import math
import random
import pandas as pd
from collections import defaultdict
from operator import itemgetter

#加载数据，形成训练数据
def LoadGameEventData(df, train_rate):
    ratings = df[['item', 'league']]
    print(ratings)
    train = []
    test = []
    random.seed(3)
    for idx, row in ratings.iterrows():
        item = row['item']
        league = row['league']
        tt = random.random()
        print(tt)
        if tt < train_rate:
            train.append([league, item])
        else:
            test.append([league, item])
    return PreProcessData(train), PreProcessData(test)


def PreProcessData(originData):
    """
    建立User-Item表，结构如下：
        {"User1": {MovieID1, MoveID2, MoveID3,...}
         "User2": {MovieID12, MoveID5, MoveID8,...}
         ...
        }
    """
    trainData = dict()
    for user, item in originData:
        trainData.setdefault(user, set())
        trainData[user].add(item)
    print("--------kkkk--------")
    print(trainData)
    return trainData


class ItemCF(object):
    """ Item based Collaborative Filtering Algorithm Implementation"""

    def __init__(self, trainData, similarity="cosine", norm=True):
        self._trainData = trainData
        self._similarity = similarity
        self._isNorm = norm
        self._itemSimMatrix = dict()  # 物品相似度矩阵

    '''
    这里并没有采用对每个用户都建立共现矩阵再累加的方式，而是直接采用了两重dict来实现一个Matrix，
    然后在此基础上直接建立共现矩阵。'''

    def similarity(self):
        N = defaultdict(int)  # 记录每个物品的喜爱人数
        for user, items in self._trainData.items():
            for i in items:
                self._itemSimMatrix.setdefault(i, dict())
                N[i] += 1
                for j in items:
                    if i == j:
                        continue
                    self._itemSimMatrix[i].setdefault(j, 0)
                    if self._similarity == "cosine":
                        self._itemSimMatrix[i][j] += 1
                    elif self._similarity == "iuf":
                        self._itemSimMatrix[i][j] += 1. / math.log1p(len(items) * 1.)
        print(self._itemSimMatrix)
        for i, related_items in self._itemSimMatrix.items():
            for j, cij in related_items.items():
                self._itemSimMatrix[i][j] = cij / math.sqrt(N[i] * N[j])
        # 是否要标准化物品相似度矩阵
        if self._isNorm:
            for i, relations in self._itemSimMatrix.items():
                max_num = relations[max(relations, key=relations.get)]
                # 对字典进行归一化操作之后返回新的字典
                self._itemSimMatrix[i] = {k: v / max_num for k, v in relations.items()}

    def recommend(self, user, N, K):
        """
        :param user: 被推荐的用户user
        :param N: 推荐的商品个数
        :param K: 查找的最相似的用户个数
        :return: 按照user对推荐物品的感兴趣程度排序的N个商品
        """
        recommends = dict()
        # 先获取user的喜爱物品列表
        items = self._trainData[user]
        for item in items:
            # 对每个用户喜爱物品在物品相似矩阵中找到与其最相似的K个
            for i, sim in sorted(self._itemSimMatrix[item].items(), key=itemgetter(1), reverse=True)[:K]:
                if i in items:
                    continue  # 如果与user喜爱的物品重复了，则直接跳过
                recommends.setdefault(i, 0.)
                recommends[i] += sim
        # 根据被推荐物品的相似度逆序排列，然后推荐前N个物品给到用户
        return dict(sorted(recommends.items(), key=itemgetter(1), reverse=True)[:N])

    # def recommend(trainData, itemSimMatrix, user, N, K):
    #     """
    #     :param trainData: User-Item表
    #     :param itemSimMatrix: 物品相似度矩阵
    #     :param user: 被推荐的用户user
    #     :param N: 推荐的商品个数
    #     :param K: 查找的最相似的用户个数
    #     :return: 按照user对推荐物品的感兴趣程度排序的N个商品
    #     """
    #     recommends = dict()
    #     # 先获取user的喜爱物品列表
    #     items = trainData[user]
    #     for item in items:
    #         # 对每个用户喜爱物品在物品相似矩阵中找到与其最相似的K个
    #         for i, sim in sorted(itemSimMatrix[item].items(), key=itemgetter(1), reverse=True)[:K]:
    #             if i in items:
    #                 continue  # 如果与user喜爱的物品重复了，则直接跳过
    #             recommends.setdefault(i, 0.)
    #             recommends[i] += sim
    #     # 根据被推荐物品的相似度逆序排列，然后推荐前N个物品给到用户
    #     return dict(sorted(recommends.items(), key=itemgetter(1), reverse=True)[:N])

    def train(self):
        self.similarity()


if __name__ == "__main__":
    # train, test = LoadMovieLensData("../Data/ml-1m/ratings.dat", 0.8)
    # print("train data size: %d, test data size: %d" % (len(train), len(test)))
    # ItemCF = ItemCF(train, similarity='iuf', norm=True)
    # ItemCF.train()
    #
    # # 分别对以下4个用户进行物品推荐
    # print(ItemCF.recommend(1, 5, 80))
    pass